# Computes overall descriptive stats on the population count discrepancy at
# precinct level.

library(redist)
library(tidyverse)
library(sf)
library(fs)
library(patchwork)
library(glue)
library(here)

source(here("R/00_custom_functions.R"))


pa_dists <- read_rds(here("data/PA/pa_plans.rds"))
vtd_das19 = read_rds(here("data/PA/pa_vtd_das_19.rds"))
vtd_das12 = read_rds(here("data/PA/pa_vtd_das_12.rds")) %>%
    rename_with(~ str_c(., "_das12"), -precinct)
vtd_das4 = read_rds(here("data/PA/pa_vtd_das_04.rds")) %>%
    rename_with(~ str_c(., "_das4"), -precinct)
vtd_orig = read_rds(here("data/PA/pa_vtd_orig.rds"))
pa_shp = read_rds(here("data/PA/pa_shp.rds")) %>%
    sf::st_as_sf()
pa_map = inner_join(pa_shp, vtd_orig, by="precinct") %>%
    inner_join(vtd_das19, by="precinct", suffix=c("_orig", "_das19")) %>%
    inner_join(vtd_das12, by="precinct") %>%
    inner_join(vtd_das4, by="precinct") %>%
    rename(pop = pop_orig,
           cd_11 = cd,
           v4_pop = pop_das4,
           v12_pop = pop_das12,
           v19_pop = pop_das19,
           pop_black = pop_black_orig,
           v4_pop_black = pop_black_das4,
           v12_pop_black = pop_black_das12,
           v19_pop_black = pop_black_das19
    ) %>%
    mutate(cd = pa_dists[, "court"]) # enacted plan in 2018


la_map <- read_rds(here("data/LA/la.rds"))
nc_map <- read_rds(here("data/NC/nc_shp.rds")) %>%
  rename(cd = cd_17)
sc_map <- read_rds(here("data/SC/sc_shp.rds"))


# select only some variables, then stack, tracking state
pop_df <- map_dfr(
  #.x = list(PA = pa_map, LA = la_map, NC = nc_map, SC = sc_map),
  .x = list(PA = pa_map, LA = la_map, SC = sc_map),
  .f = ~ select(as_tibble(sf::st_drop_geometry(.x)),
                cd, pop, v4_pop, v12_pop, v19_pop, pop_black,
                v4_pop_black, v12_pop_black, v19_pop_black),
  .id = "state")


# sumary stats
summ_diff <- function(tbl) {
  tbl %>%
    summarize(
      v19_diff = mean(abs(v19_pop - pop)),
      v12_diff = mean(abs(v12_pop - pop)),
      v4_diff  = mean(abs(v4_pop - pop)),
      v19_black_diff = mean(abs(v19_pop_black - pop_black)),
      v12_black_diff = mean(abs(v12_pop_black - pop_black)),
      v4_black_diff  = mean(abs(v4_pop_black - pop_black)),
      v19_ratio = v19_diff / mean(pop),
      v12_ratio = v12_diff / mean(pop),
      v4_ratio = v4_diff / mean(pop),
      v19_black_ratio = v19_black_diff / mean(pop_black),
      v12_black_ratio = v12_black_diff / mean(pop_black),
      v4_black_ratio = v4_diff / mean(pop),
      v19_total = (sum(v19_pop) - sum(pop)),
      v12_total = (sum(v12_pop) - sum(pop)),
      v4_total  = (sum(v4_pop) - sum(pop)),
      mean_size = mean(pop),
      median_size = median(pop),
      n = n()
    )
}

pop_sum <- pop_df %>% summ_diff()
pop_state <- pop_df %>% group_by(state) %>%  summ_diff()
pop_cd <- pop_df %>% group_by(state, cd) %>%  summ_diff()

# awkward but direct way to store and track these estimates
write_csv(pop_sum, here("data/numbers/DAS_pop-diff_by-precinct.csv"))
write_csv(pop_state, here("data/numbers/DAS_pop-diff_by-precinct_by-state.csv"))
write_csv(pop_cd, here("data/numbers/DAS_pop-diff_by-precinct_by-CD.csv"))
